import py
from os import system, chdir
from urllib import urlopen

log_URL = 'http://tismerysoft.de/pypy/irc-logs/'
archive_FILENAME = 'pypy.tar.gz'

tempdir = py.test.ensuretemp("irc-log")

# get compressed archive
chdir( str(tempdir))
system('wget -q %s%s' % (log_URL, archive_FILENAME))
system('tar xzf %s'   % archive_FILENAME)
chdir('pypy')

# get more recent daily logs
pypydir = tempdir.join('pypy')
for line in urlopen(log_URL + 'pypy/').readlines():
    i = line.find('%23pypy.log.')
    if i == -1:
        continue
    filename = line[i:].split('"')[0]
    system('wget -q %spypy/%s' % (log_URL, filename))

# rename to YYYYMMDD
for log_filename in pypydir.listdir('#pypy.log.*'):
    rename_to = None
    b = log_filename.basename
    if '-' in b:
        rename_to = log_filename.basename.replace('-', '')
    elif len(b) == 19:
        months= 'Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
        day   = b[10:12]
        month = months.index(b[12:15]) + 1
        year  = b[15:20]
        rename_to = '#pypy.log.%04s%02d%02s' % (year, month, day)

    if rename_to:
        log_filename.rename(rename_to)
        #print 'RENAMED', log_filename, 'TO', rename_to

# print sorted list of filenames of daily logs
print 'irc://irc.freenode.org/pypy'
print 'date, messages, visitors'
for log_filename in pypydir.listdir('#pypy.log.*'):
    n_messages, visitors = 0, {}
    f = str(log_filename)
    for s in file(f):
        if '<' in s and '>' in s:
            n_messages += 1
        elif ' joined #pypy' in s:
            v = s.split()[1]
            visitors[v] = True
    print '%04s-%02s-%02s, %d, %d' % (f[-8:-4], f[-4:-2], f[-2:], n_messages, len(visitors.keys()))
